import lxml.etree
from lxml import etree

# lxml基础语法
tree = etree.parse('demo.html', etree.HTMLParser())
li_list = tree.xpath('//ul/li/text()')
print(li_list)

li_list = tree.xpath('//li[@id="1"]/text()')
print(li_list)
li_list = tree.xpath('//li[@class="l1"]/text()')
print(li_list)

# class的属性值
li_list = tree.xpath('//li[@id="1"]/@class')
print(li_list)

li_list = tree.xpath('//li[contains(@id,"1")]/text()')
print(li_list)
li_list = tree.xpath('//li[contains(@class,"l1")]/text()')
print(li_list)

li_list = tree.xpath('//ul/li[starts-with(@class,"c")]/text()')
print(li_list)

li_list = tree.xpath('//li[@id="1" and @class="l1"]/text()')
print(li_list)

#查找百度页面的百度一下

import urllib.request
url = "https://www.baidu.com/"
requests=urllib.request.Request(url, headers={'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"})
response=urllib.request.urlopen(requests)
context = response.read().decode('utf-8')
print(context)
html = etree.HTML(context)
xpath = html.xpath('//span/input[@id="su"]/@value')
print(xpath)


